instacart = read_csv("./data/instacart_train_data.csv.zip") %>%
clean_names() %>%
distinct()
## Warning in strptime(x, format, tz = tz): unknown timezone 'default/America/
## New_York'
## Parsed with column specification:
## cols(
## order_id = col_integer(),
## product_id = col_integer(),
## add_to_cart_order = col_integer(),
## reordered = col_integer(),
## user_id = col_integer(),
## eval_set = col_character(),
## order_number = col_integer(),
## order_dow = col_integer(),
## order_hour_of_day = col_integer(),
## days_since_prior_order = col_integer(),
## product_name = col_character(),
## aisle_id = col_integer(),
## department_id = col_integer(),
## aisle = col_character(),
## department = col_character()
## )
Column
Department Frequency
barplot_department_ggplot = instacart %>%
count(department) %>%
mutate(department = fct_reorder(department, n)) %>%
ggplot(aes(x = department, y = n, fill = department)) +
geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
scale_y_continuous(name = "N", labels = scales::unit_format("k", 1e-3))
ggplotly(barplot_department_ggplot)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
Column
Most Popular Products by Aisle in the Produce Department
barplot_popular_produce = instacart %>%
filter(department == "produce") %>%
group_by(aisle) %>%
count(product_name) %>%
top_n(2) %>%
ggplot(aes(x = aisle, y = n, fill = product_name)) +
geom_bar(stat="identity", position="dodge") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(title = "Most Popular Products by Aisle in Produce Department")
## Selecting by n
ggplotly(barplot_popular_produce)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
Distribution of Order Hour of Day by Department
boxplot_order_hour = instacart %>%
ggplot(aes(x = department, y = order_hour_of_day, fill = department)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(title = "Order Hour of Day by Department")
ggplotly(boxplot_order_hour)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`